'''Copyright (c) [2019] [范志俊]
[nowcoderSpider] is licensed under the Mulan PSL v1.
You can use this software according to the terms and conditions of the Mulan PSL v1.
You may obtain a copy of Mulan PSL v1 at:
    http://license.coscl.org.cn/MulanPSL
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
PURPOSE.
See the Mulan PSL v1 for more details.'''
import re
import pymysql
from selenium import webdriver
import time
import random
# 登录
def login():
    print('登录')
    browser.get(url)
    browser.find_element_by_id('jsEmailIpt').send_keys('XXXXX')# 请填写你的账号，替换XXXXX
    browser.find_element_by_id('jsPasswordIpt').send_keys('XXXXX')# 请填写你的密码，替换XXXXX
    browser.find_element_by_id('jsLoginBtn').click()
# 当前页数据采集
def get_page_data(url):
    time.sleep(random.randint(1,3))
    item_element = browser.find_elements_by_class_name(' js-go-summary')
    print('获取当前页的数据')
    for item_index, _ in enumerate(item_element):
        print('访问面试题，第{}题'.format(item_index+1))
        browser.execute_script("window.scrollTo(0,{})".format(500 * (item_index // 5)))
        browser.find_elements_by_class_name(' js-go-summary')[item_index].click()
        print('提交试卷')
        paper_name=browser.find_element_by_xpath('//span[@class="js-paper-name"]').text
        browser.find_element_by_id("next").click()
        if 'login' in browser.current_url:
            login()
        try:
            browser.find_element_by_id('aheadFinish').click()
            browser.find_element_by_xpath('//div[@class="pop-footer clearfix"]/a[1]').click()
        except Exception as e:
            browser.find_element_by_id('next').click()
        print('遍历题目和答案')
        browser.find_element_by_xpath('//ul[@class="menu clearfix"]/li[2]/a').click()
        li_element = browser.find_elements_by_xpath('//ul[@class="subject-num-list"]/li')
        time.sleep(random.randint(2, 4))
        for index, _ in enumerate(li_element):
            browser.find_elements_by_xpath('//ul[@class="subject-num-list"]/li')[index].click()
            question = browser.find_element_by_class_name('question-main').text
            try:
                answer_get = browser.find_element_by_xpath(
                        '//div[@class="result-subject-item result-subject-answer"]/h1').text
                    answer = re.compile('正确答案: (.*)?你的答案: ').findall(answer_get)
                    if answer:
                        answer_result = answer[0]
                    else:
                        try:
                            answer_result = browser.find_elements_by_xpath('string(//div[@class="design-answer-box"])').text
                        except Exception as e:
                            answer_result=''
                    result = [ele.text for ele in browser.find_elements_by_xpath(
                        '//div[@class="result-subject-item result-subject-answer"]/div/pre')]
                except Exception as e:
                    answer_result = answer_get = ''
                    result = []
                tag = [ele.text for ele in browser.find_elements_by_xpath('//a[@class="tag-label"]')]
                cursor.execute('insert into nowcoder value (NULL,%s,%s,%s,%s,%s,%s,%s)',
                               (question, answer_result, '|'.join(result), browser.current_url, '|'.join(tag),paper_name,index))
        except Exception as e:
            browser.get(url)
            continue
        print('*'*100)
        browser.get(url)
        conn.commit()
if __name__=='__main__':
    url='https://www.nowcoder.com/login?callBack=https%3A%2F%2Fwww.nowcoder.com%2FcontestRoom'
    browser = webdriver.Chrome(executable_path='chromedriver.exe')
    conn=pymysql.connect(host='172.16.100.90',user='root',passwd='123456',db='contestRoom',charset='utf8')
    cursor=conn.cursor()
    login()
    while True:
        url=browser.current_url
        print('正在访问:{}'.format(url))
        get_page_data(url)
        print('点击下一页')
        try:
            next_page=browser.find_element_by_link_text('下一页').click()
            print('Next')
        except Exception as e:
            print(e)
            break
    print('完成')
    cursor.close()
    conn.close()
    browser.quit()